Purpose:
Calculate and plot mutational signatures for all samples using COSMIC signatures and Alexandrov et al, 2013 mutational signatures.
To run this from the command line, use: Rscript -e "rmarkdown::render('analyses/mutational-signatures/mutational_signatures.Rmd', clean = TRUE)"
This assumes you are in the top directory of the repository.
Import necessary functions.
# Magrittr pipe
`%>%` <- dplyr::`%>%`
# Import specialized functions
source(file.path("util", "mut_sig_functions.R"))
# Load this library
library(deconstructSigs)
library(tidyverse)
── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.2.0 ✔ purrr 0.3.2
✔ tibble 2.1.3 ✔ dplyr 0.8.3
✔ tidyr 0.8.3 ✔ stringr 1.4.0
✔ readr 1.3.1 ✔ forcats 0.4.0
── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
Set up directory paths.
data_dir <- file.path("..", "..", "data")
input_dir <- "input"
results_dir <- "results"
plots_dir <- "plots"
figures_dir <- file.path("..", "..", "figures")
cosmicv2_plots <- file.path(plots_dir, "cosmicv2")
nature_plots <- file.path(plots_dir, "nature")
cosmicv3_plots <- file.path(plots_dir, "cosmicv3")
Make new directories for the results.
if (!dir.exists(results_dir)) {
dir.create(results_dir)
}
if (!dir.exists(cosmicv2_plots)) {
dir.create(cosmicv2_plots, recursive = TRUE)
}
if (!dir.exists(nature_plots)) {
dir.create(nature_plots, recursive = TRUE)
}
if (!dir.exists(cosmicv3_plots)) {
dir.create(cosmicv3_plots, recursive = TRUE)
}
# Declare file path for consensus file
consensus_file <- file.path(data_dir, "snv-consensus-plus-hotspots.maf.tsv.gz")
Read in the consensus MAF file.
# Read in the file
maf <- data.table::fread(consensus_file, data.table = FALSE)
Registered S3 method overwritten by 'R.oo':
method from
throw.default R.methodsS3
Read in the histology colors and labels.
histology_label_mapping <- readr::read_tsv(
file.path(figures_dir, "palettes", "histology_label_color_table.tsv")
) %>%
# Select just the columns we will need for plotting
dplyr::select(Kids_First_Biospecimen_ID, display_group, display_order, hex_codes) %>%
# Reorder display_group based on display_order
dplyr::mutate(display_group = forcats::fct_reorder(display_group, display_order))
Parsed with column specification:
cols(
Kids_First_Biospecimen_ID = col_character(),
sample_type = col_character(),
integrated_diagnosis = col_character(),
Notes = col_character(),
harmonized_diagnosis = col_character(),
broad_histology = col_character(),
short_histology = col_character(),
display_group = col_character(),
n = col_double(),
display_order = col_double(),
hex_codes = col_character()
)
Set up gradient color palette for the bubble matrix plots.
gradient_col_palette <- readr::read_tsv(
file.path(figures_dir, "palettes", "gradient_color_palette.tsv")
)
Parsed with column specification:
cols(
color_names = col_character(),
hex_codes = col_character()
)
# Won't need NA color this time.
gradient_col_palette <- gradient_col_palette %>%
dplyr::filter(color_names != "na_color")
Read in the metadata and set it up with the color palette.
metadata_df <- readr::read_tsv(file.path(data_dir, "histologies.tsv"), guess_max = 10000) %>%
dplyr::select("Kids_First_Biospecimen_ID", "experimental_strategy") %>%
dplyr::inner_join(histology_label_mapping, by = "Kids_First_Biospecimen_ID") %>%
dplyr::rename(Tumor_Sample_Barcode = "Kids_First_Biospecimen_ID")
Parsed with column specification:
cols(
.default = col_character(),
OS_days = col_double(),
age_last_update_days = col_double(),
cns_methylation_class_score = col_double(),
cns_methylation_subclass_score = col_double(),
RF_purity_ABSOLUTE = col_double(),
RF_purity_ESTIMATE = col_double(),
RF_purity_LUMP = col_double(),
normal_fraction = col_double(),
tumor_fraction = col_double(),
tumor_ploidy = col_double()
)
See spec(...) for full column specifications.
Read in the WGS and WXS regions so they can be used for the Mb denominator.
# Set up BED region files for TMB calculations
region_sizes <- readr::read_tsv(file.path(data_dir, "snv-mutation-tmb-all.tsv")) %>%
dplyr::select(Tumor_Sample_Barcode, region_size)
Parsed with column specification:
cols(
Tumor_Sample_Barcode = col_character(),
experimental_strategy = col_character(),
cancer_group = col_character(),
mutation_count = col_double(),
region_size = col_double(),
tmb = col_double()
)
Determine how many mutations we have per sample.
mut_per_sample <- maf %>%
dplyr::group_by(Tumor_Sample_Barcode) %>%
dplyr::tally() %>%
dplyr::arrange(n)
summary(mut_per_sample$n)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 67 896 2988 1902 1079953
Graph this.
ggplot2::ggplot(mut_per_sample, ggplot2::aes(x = n, geom = "density")) +
ggplot2::geom_density() +
ggplot2::theme_classic()
Make mutation data into deconstructSigs input format.
# Convert to deconstructSigs input
sigs_input <- mut.to.sigs.input(
mut.ref = maf,
sample.id = "Tumor_Sample_Barcode",
chr = "Chromosome",
pos = "Start_Position",
ref = "Reference_Allele",
alt = "Allele",
bsg = BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
)
Warning in mut.to.sigs.input(mut.ref = maf, sample.id = "Tumor_Sample_Barcode", : Some samples have fewer than 50 mutations:
BS_X0KN4VVW, BS_7KR13R3P, BS_P7VR731D, TARGET-30-PASMJG-01A-01D, TARGET-30-PASUYL-01A-01D, TARGET-30-PASYYX-01A-01D, TARGET-30-PATYPH-01A-01D, TARGET-30-PATLXP-01A-01D, TARGET-30-PATVWA-01A-01D, TARGET-30-PATMAW-01A-01D, TARGET-30-PATVTL-01A-01D, TARGET-30-PAUBPW-09A-01D, TARGET-30-PATNRI-01A-01D, TARGET-30-PAMMXF-01A-01W, TARGET-30-PAUGJI-01A-01D, TARGET-30-PASRSG-01A-01D, TARGET-30-PATXHW-01A-01D, TARGET-30-PAPBJT-01A-01W, TARGET-30-PASVWG-01A-01D, TARGET-30-PAUCFI-01A-01D, TARGET-30-PASAFG-01A-01D, TARGET-30-PASLMN-01A-01D, TARGET-30-PARXMM-01A-01D, TARGET-30-PATBJI-01A-01D, TARGET-30-PASPVR-09A-01D, TARGET-30-PATKSX-01A-01D, TARGET-30-PAVEZM-01A-01D, TARGET-20-PANSBH-04A-01D, TARGET-30-PAUWFE-01A-01D, TARGET-30-PAPRMJ-01A-01D, TARGET-30-PARVZT-01A-01D, TARGET-30-PASUEA-01A-01D, TARGET-30-PAIXNC-01A-01W, TARGET-30-PASEWZ-01A-01D, TARGET-30-PAUWDK-01A-01D, TARGET-30-PAULVH-01A-01D, TARGET-30-PATWGR-01A-01D, TARGET-30-PASZTV-01A-01D, TARGET-30-PATSXC-01A-01D, TARGET-30-PASBZV-01A-01D, TARGET-30-PATYEJ-01A-01D, TARGET-30-PASXMI-09A-01D, TARGET-30-PATPXJ-01A-01D, TARGET-30-PATSDR-01A-01D, TARGET-30-PAUHYY-01A-01D, TARGET-30-PATXKG-01A-01D, TARGET-30-PATBAC-01A-01D, TARGET-30-PASEJZ-01A-01D, TARGET-30-PAUKLK-01A-01D, TARGET-30-PASAAB-01A-01D, TARGET-30-PAUGIP-01A-01D, TARGET-30-PALXMM-01A-01W, TARGET-30-PASVKL-09A-01D, TARGET-30-PATNEA-01A-01D, TARGET-30-PASPVZ-09A-01D, TARGET-30-PATKGB-01A-01D, TARGET-30-PAMBAC-01A-01W, TARGET-30-PASLCD-01A-01D, TARGET-30-PAUHSJ-01A-01D, TARGET-30-PAICGF-01A-01W, TARGET-30-PATFPS-09A-01D, TARGET-30-PATGLU-01A-01D, TARGET-30-PANZPV-01A-01W, TARGET-30-PAUZSB-01A-01D, TARGET-30-PATBHY-01A-01D, TARGET-30-PATSKE-01A-01D, TARGET-30-PASATF-01A-01D, TARGET-20-PASRTP-09A-01D, TARGET-30-PAIXNV-01A-01W, TARGET-30-PATVDI-01A-01D, TARGET-30-PAUMXC-01A-01D, TARGET-30-PASEGF-01A-01D, TARGET-30-PASPTF-01A-01D, TARGET-30-PAUHHW-01A-01D, TARGET-30-PATXHC-01A-01D, TARGET-30-PAUGZD-01A-01D, TARGET-30-PATLNM-01A-01D, TARGET-30-PATTPL-01A-01D, TARGET-30-PASZST-01A-01D, TARGET-30-PASTXV-01A-01D, TARGET-30-PAUNST-01A-01D, TARGET-30-PASFDU-01A-01D, TARGET-30-PAUWXY-01A-01D, TARGET-30-PATJXV-01A-01D, TARGET-30-PARUTX-01A-01D, TARGET-30-PATINJ-01A-01D, TARGET-30-PAVCLI-01A-01D, TARGET-30-PATUVB-01A-01D, TARGET-30-PAITCI-01A-01W, TARGET-30-PAVCJZ-01A-01D, TARGET-30-PAUTVX-01A-01D, TARGET-30-PAUHIK-01A-01D, TARGET-30-PAUPRN-01A-01D, TARGET-30-PATDSY-01A-01D, TARGET-30-PAUUZU-01A-01D, TARGET-30-PALBFW-01A-01W, TARGET-30-PAUFUS-01A-01D, TARGET-30-PASKYH-01A-01D, TARGET-30-PASYJF-01A-01D, TARGET-30-PATNCI-01A-01D, TARGET-30-PAMVRA-01A-01W, TARGET-30-PASRLC-01A-01D, TARGET-30-PASFKX-01A-01D, TARGET-30-PATFES-01A-01D, TARGET-30-PAMZMG-01A-01W, TARGET-30-PAKYZS-01A-01W, TARGET-30-PARSVF-01A-01D, TARGET-30-PANSBN-01A-01D, TARGET-30-PALTYB-01A-01W, TARGET-30-PASCWD-01A-01D, TARGET-30-PASMUB-01A-01D, TARGET-30-PATRXC-01A-01D, TARGET-30-PATTMM-01A-01D, TARGET-30-PATNRK-09A-01D, TARGET-30-PAUBSW-01A-01D, TARGET-30-PATYMS-01A-01D, TARGET-30-PASSEC-01A-01D, TARGET-30-PASBPN-01A-01D, TARGET-30-PASBMW-01A-01D, TARGET-30-PASEAR-01A-01D, TARGET-30-PAPHPE-01A-01W, TARGET-30-PATXWS-01A-01D, TARGET-30-PASBDN-01A-01D, TARGET-30-PATJHU-01A-01D, TARGET-30-PATAFE-01A-01D, TARGET-30-PASCZY-01A-01D, TARGET-30-PARWEH-01A-01D, TARGET-30-PASTIJ-01A-01D, TARGET-30-PASPIK-01A-01D, TARGET-30-PAVEKN-01A-01D, TARGET-30-PATNWL-01A-01D, TARGET-30-PARXVA-01A-01D, TARGET-30-PAUDIK-01A-01D, TARGET-30-PAUDMU-01A-01D, TARGET-30-PASLRM-01A-01D, TARGET-30-PAUNTY-01A-01D, TARGET-30-PASWVE-01A-01D, TARGET-30-PATRJG-01A-01D, TARGET-30-PASFCG-01A-01D, TARGET-30-PAUWYM-01A-01D, TARGET-30-PARYEH-01A-01D, TARGET-30-PAUELT-01A-01D, TARGET-30-PARJMX-01A-01D, TARGET-30-PANRHJ-01A-01W, TARGET-30-PAUIFL-01A-01D, TARGET-30-PAPUJU-01A-01D, TARGET-30-PATZBH-01A-01D, TARGET-30-PAUMMZ-01A-01D, TARGET-30-PAIXRK-01A-01W, TARGET-30-PATYWX-01A-01D, TARGET-30-PASEWX-01A-01D, TARGET-30-PAUXIW-09A-01D, TARGET-30-PAUFFP-01A-01D, TARGET-30-PAPRPR-01A-01D, TARGET-30-PAUAZA-01A-01D, TARGET-30-PATNZJ-01A-01D, TARGET-30-PAUPIC-01A-01D, TARGET-30-PALUDH-01A-01W, TARGET-30-PATYMK-01A-01D, TARGET-30-PATMTX-01A-01D, TARGET-30-PATEWM-01A-01D, TARGET-30-PASCRK-01A-01D, TARGET-30-PAULNF-01A-01D, TARGET-30-PATRHD-01A-01D, TARGET-30-PATZRU-01A-01D, TARGET-30-PAURPL-01A-01D, TARGET-30-PAUCGP-01A-01D, TARGET-30-PASKRX-01A-01D, TARGET-30-PASZFX-01A-01D, TARGET-30-PARNNG-01A-01D, TARGET-30-PAUIHH-01A-01D, TARGET-30-PASJTA-01A-01D, TARGET-30-PASWKI-01A-01D, TARGET-30-PALSAE-01A-01W, TARGET-30-PATCJP-09A-01D, TARGET-30-PAUBRR-01A-01D, TARGET-30-PAUEYW-09A-01D, TARGET-30-PAUKIJ-01A-01D, TARGET-30-PASLPG-01A-01D, TARGET-30-PASJWU-01A-01D, TARGET-30-PATSPZ-01A-01D, TARGET-30-PARYVD-01A-01D, TARGET-30-PATEKG-01A-01D, TARGET-30-PATFTR-01A-01D, TARGET-30-PAUMBB-01A-01D, TARGET-30-PAUJLH-01A-01D, TARGET-30-PATMSI-01A-01D, TARGET-30-PATYMZ-01A-01D, TARGET-30-PASLTC-01A-01D, TARGET-30-PASFWL-01A-01D, TARGET-30-PASAZZ-01A-01D, TARGET-30-PASMDG-01A-01D, TARGET-30-PASFEV-01A-01D, TARGET-30-PAUSXH-01A-01D, TARGET-30-PAUUGT-01A-01D, TARGET-30-PATCDJ-01A-01D, TARGET-30-PATHYK-01A-01D, TARGET-30-PAVDNK-01A-01D, TARGET-30-PAUJPC-01A-01D, TARGET-30-PASAHC-01A-01D, TARGET-30-PATMXC-01A-01D, TARGET-30-PAKIPY-01A-01W, TARGET-30-PARVHG-01A-01D, TARGET-30-PAVCKK-01A-01D, TARGET-30-PAURCG-01A-01D, TARGET-30-PASKSR-01A-01D, TARGET-30-PATVJX-01A-01D, TARGET-20-PANLRE-04A-01D, TARGET-30-PAKVUY-01A-01D, TARGET-30-PATFMU-01A-01D, TARGET-30-PARTCE-01A-01D, TARGET-30-PATBPG-01A-01D, TARGET-30-PASFDV-01A-01D, TARGET-30-PASBJY-01A-01D, TARGET-30-PAUGRP-01A-01D, TARGET-30-PASNUI-01A-01D, TARGET-30-PASUMG-01A-01D, TARGET-30-PASEDP-01A-01D, TARGET-30-PATMSR-01A-01D, TARGET-30-PARYWX-01A-01D, TARGET-30-PATYCM-01A-01D, TARGET-30-PATLCM-01A-01D, TARGET-30-PATMRZ-01A-01D, TARGET-30-PASLDM-01A-01D, TARGET-30-PAUBEC-01A-01D, TARGET-30-PATCKU-01A-01D, TARGET-30-PAUBVN-01A-01D, TARGET-30-PARXPD-01A-01D, TARGET-30-PARWEV-01A-01D, TARGET-30-PASNMJ-01A-01D, TARGET-30-PASKSX-01A-01D, TARGET-30-PATSRD-01A-01D, TARGET-30-PANKFE-01A-01W, TARGET-30-PATYWM-01A-01D, TARGET-30-PASGHN-01A-01D, TARGET-30-PAUZMG-01A-01D, TARGET-30-PASCEW-09A-01D, TARGET-30-PARVWL-01A-01D, TARGET-30-PARZZD-01A-01D, TARGET-30-PASLXS-01A-01D, TARGET-30-PATDVF-01A-01D, TARGET-30-PAUPGV-01A-01D, TARGET-30-PAUWEV-01A-01D, TARGET-30-PATTPW-01A-01D, TARGET-30-PAUBYW-01A-01D, TARGET-30-PASYIP-01A-01D, TARGET-30-PASJUU-01A-01D, TARGET-30-PAUBFU-01A-01D, TARGET-30-PALJUV-01A-01W, TARGET-30-PASCIX-09A-01D, TARGET-30-PASGNT-01A-01D, TARGET-30-PAPVFD-01A-01D, TARGET-30-PATTPC-01A-01D, TARGET-30-PASGEE-01A-01D, TARGET-30-PATNGD-01A-01D, TARGET-30-PAUXSZ-01A-01D, TARGET-30-PASDRV-01A-01D, TARGET-30-PASKRS-01A-01D, TARGET-30-PASGGI-01A-01D, TARGET-30-PATTFB-01A-01D, TARGET-30-PANBSP-01A-01W, TARGET-30-PASLIH-01A-01D, TARGET-30-PATPET-01A-01D, TARGET-30-PATYDC-01A-01D, TARGET-30-PATUZF-01A-01D, TARGET-30-PASSII-01A-01D, TARGET-30-PASTHF-01A-01D, TARGET-30-PASYTP-01A-01D, TARGET-30-PASUCU-01A-01D, TARGET-30-PAUGNL-01A-01D, TARGET-30-PARYSR-01A-01D, TARGET-30-PARXAX-01A-01D, TARGET-30-PASMET-01A-01D, TARGET-30-PATIHB-01A-01D, TARGET-30-PARLMK-01A-01D, TARGET-30-PATMJV-09A-01D, TARGET-30-PAUITU-01A-01D, TARGET-30-PASXCG-01A-01D, TARGET-30-PATRUX-09A-01D, TARGET-30-PARABJ-01A-01D, TARGET-30-PALXHW-01A-01W, TARGET-30-PASYYM-01A-01D, TARGET-30-PANWRR-01A-01D, TARGET-30-PARTPF-01A-01D, TARGET-30-PATUEH-01A-01D, TARGET-30-PASKRA-01A-01D, TARGET-30-PASUTC-01A-01D, TARGET-30-PATLLI-01A-01D, TARGET-30-PATMPC-01A-01D, TARGET-30-PATISU-01A-01D, TARGET-30-PATDFU-01A-01D, TARGET-30-PASJWG-01A-01D, TARGET-30-PAKHCF-01A-01W, TARGET-30-PAUTKP-01A-01D, TARGET-30-PARZZC-01A-01D, TARGET-30-PATSJV-01A-01D, TARGET-30-PASLYF-01A-01D, TARGET-30-PAPVRN-01A-01D, TARGET-30-PATWWJ-01A-01D, TARGET-30-PASMPT-01A-01D, TARGET-30-PAVCGD-01A-01D, TARGET-30-PAVALS-01A-01D, TARGET-30-PASTGH-09A-01D, TARGET-30-PATFTN-01A-01D, TARGET-30-PATPJD-09A-01D, TARGET-30-PAPTLV-01A-01D, TARGET-30-PASPXU-01A-01D, TARGET-30-PASLAE-01A-01D, TARGET-30-PASPGU-01A-01D, TARGET-30-PARYRJ-01A-01D, TARGET-30-PAUXFZ-01A-01D, TARGET-30-PAIFCS-01A-01W, TARGET-30-PASRWE-01A-01D, TARGET-30-PATDXG-01A-01D, TARGET-30-PAUKRF-01A
Add total mutations per sample.
# Count the total number of signature mutations for each sample
total_muts <- apply(sigs_input, 1, sum)
Get list of tumor sample ids.
tumor_sample_ids <- maf %>%
dplyr::filter(Tumor_Sample_Barcode %in% rownames(sigs_input)) %>%
dplyr::distinct(Tumor_Sample_Barcode) %>%
dplyr::pull(Tumor_Sample_Barcode)
Get COSMIC v2 signatures for each sample. This step will take some time.
sample_sigs_cosmic <- lapply(tumor_sample_ids, function(sample_id) {
# Determine the signatures contributing to the sample
whichSignatures(
tumor.ref = sigs_input,
signatures.ref = signatures.cosmic,
sample.id = sample_id,
contexts.needed = TRUE
)
})
# Bring along the names
names(sample_sigs_cosmic) <- tumor_sample_ids
# Create matrix of COSMIC signature weights
cosmic_weights <- lapply(sample_sigs_cosmic, "[[", "weights")
cosmic_wide <- do.call(dplyr::bind_rows, cosmic_weights) %>%
add_column('Kids_First_Biospecimen_ID' = unlist(lapply(cosmic_weights, rownames)), .before = 1) %>%
tibble::as_tibble() %>%
readr::write_tsv(file.path(results_dir, 'COSMICv2_signature_exposures.tsv'))
# Create data frame of COSMIC signature weights
#cosmic_exposures <- cosmic_wide %>%
# tidyr::gather(-Kids_First_Biospecimen_ID, key = "signature", value = "exposure") %>%
# mutate(source = 'COSMIC')
Get Alexandrov et al, 2013 signatures for each sample.
sample_sigs_nature <- lapply(tumor_sample_ids, function(sample_id) {
# Determine the signatures contributing to the sample
whichSignatures(
tumor.ref = sigs_input,
signatures.ref = signatures.nature2013,
sample.id = sample_id,
contexts.needed = TRUE
)
})
# Bring along the names
names(sample_sigs_nature) <- tumor_sample_ids
# Create data frame of Nature signature weights
nature_weights <- lapply(sample_sigs_nature, "[[", "weights")
nature_wide <- do.call(dplyr::bind_rows, nature_weights) %>%
add_column('Kids_First_Biospecimen_ID' = unlist(lapply(nature_weights, rownames)), .before = 1) %>%
tibble::as_tibble() %>%
readr::write_tsv(file.path(results_dir, 'Nature_signature_exposures.tsv'))
# Create data frame of Nature signature weights
#nature_exposures <- nature_wide %>%
# tidyr::gather(-Kids_First_Biospecimen_ID, key = "signature", value = "exposure") %>%
# mutate(source = 'Nature')
Get COSMIC genome v3.3 signatures for each sample. This step will take some time.
signatures.cosmic.v3.3 <- read_tsv(file.path(input_dir, 'COSMIC_v3.3.1_SBS_GRCh38.txt')) %>%
column_to_rownames('Type') %>%
t %>%
as.data.frame()
Parsed with column specification:
cols(
.default = col_double(),
Type = col_character()
)
See spec(...) for full column specifications.
sample_sigs_cosmic_v33 <- lapply(tumor_sample_ids, function(sample_id) {
# Determine the signatures contributing to the sample
whichSignatures(
tumor.ref = sigs_input,
signatures.ref = signatures.cosmic.v3.3,
sample.id = sample_id,
contexts.needed = TRUE
)
})
# Bring along the names
names(sample_sigs_cosmic_v33) <- tumor_sample_ids
#
# Create matrix of COSMIC signature weights
cosmic_v33_weights <- lapply(sample_sigs_cosmic_v33, "[[", "weights")
cosmic_v33_wide <- do.call(dplyr::bind_rows, cosmic_v33_weights) %>%
add_column('Kids_First_Biospecimen_ID' = unlist(lapply(cosmic_v33_weights, rownames)), .before = 1) %>%
tibble::as_tibble() %>%
readr::write_tsv(file.path(results_dir, 'COSMICv3.3_signature_exposures.tsv'))
# Create data frame of COSMIC signature weights
#cosmic_v33_exposures <- cosmic_v33_wide %>%
# tidyr::gather(-Kids_First_Biospecimen_ID, key = "signature", value = "exposure") %>%
# readr::write_tsv(file.path(results_dir, 'COSMICv3.3_signature_exposures.tsv'))
sample_mut_sig_plot(
sample_sigs_cosmic,
label = "cosmicv2",
output_dir = file.path(cosmicv2_plots, "individual_mutation_sig")
)
sample_mut_sig_plot(
sample_sigs_nature,
label = "nature",
output_dir = file.path(nature_plots, "individual_mutation_sig")
)
sample_mut_sig_plot(
sample_sigs_cosmic_v33,
label = "cosmicv3",
output_dir = file.path(cosmicv3_plots, "individual_mutation_sig")
)
Do this for COSMIC v2 mutation signatures.
# Calculate mutations per signature
cosmic_sigs_df <- calc_mut_per_sig(
sample_sigs_cosmic,
muts_per_sample = total_muts,
region_size = region_sizes,
metadata = metadata_df
)
Using Tumor_Sample_Barcode, experimental_strategy, display_group, hex_codes as id variables
# Write this to a file but drop the color column
cosmic_sigs_df %>%
dplyr::select(-hex_codes) %>%
readr::write_tsv(file.path(results_dir, "cosmic_signatures_results.tsv"))
# Print out a preview
cosmic_sigs_df
Do this for COSMIC v3.3 mutation signatures.
# Calculate mutations per signature
cosmicv3_sigs_df <- calc_mut_per_sig(
sample_sigs_cosmic_v33,
muts_per_sample = total_muts,
region_size = region_sizes,
metadata = metadata_df
)
Using Tumor_Sample_Barcode, experimental_strategy, display_group, hex_codes as id variables
# Write this to a file but drop the color column
cosmicv3_sigs_df %>%
dplyr::select(-hex_codes) %>%
readr::write_tsv(file.path(results_dir, "cosmicv3_signatures_results.tsv"))
# Print out a preview
cosmicv3_sigs_df
Do this for Alexandrov et al, 2013 mutation signatures.
#Calculate mutations per signature
nature_sigs_df <- calc_mut_per_sig(
sample_sigs_nature,
muts_per_sample = total_muts,
region_size = region_sizes,
metadata = metadata_df
)
Using Tumor_Sample_Barcode, experimental_strategy, display_group, hex_codes as id variables
# Write this to a file but drop the color column
nature_sigs_df %>%
dplyr::select(-hex_codes) %>%
readr::write_tsv(file.path(results_dir, "nature_signatures_results.tsv"))
# Print out a preview
nature_sigs_df
For COSMIC v2 signatures
bubble_matrix_plot(cosmic_sigs_df,
label = "COSMIC Signatures",
color_palette = gradient_col_palette$hex_codes
)
Warning: Removed 204 rows containing missing values (geom_point).
ggplot2::ggsave(
file.path(cosmicv2_plots, "bubble_matrix_cosmicv2_mutation_sig.png"),
width = 30, height = 20, units = "cm")
Warning: Removed 204 rows containing missing values (geom_point).
For Nature signatures
bubble_matrix_plot(nature_sigs_df,
label = "Alexandrov et al, 2013 signatures",
color_palette = gradient_col_palette$hex_codes)
Warning: Removed 161 rows containing missing values (geom_point).
ggplot2::ggsave(
file.path(nature_plots, "bubble_matrix_nature_mutation_sig.png"),
width = 30, height = 20, units = "cm")
Warning: Removed 161 rows containing missing values (geom_point).
For COSMIC v3.3 signatures
bubble_matrix_plot(cosmicv3_sigs_df,
label = "COSMIC Signatures",
color_palette = gradient_col_palette$hex_codes
)
Warning: Removed 708 rows containing missing values (geom_point).
ggplot2::ggsave(
file.path(cosmicv3_plots, "bubble_matrix_cosmicv3_mutation_sig.png"),
width = 30, height = 20, units = "cm")
Warning: Removed 708 rows containing missing values (geom_point).
We will make these plots for primary tumor samples only. Lets make these for COSMIC mutation signatures first.
# Make grouped bar plots
lapply(unique(cosmic_sigs_df$display_group),
grouped_sig_barplot,
sig_num_df = cosmic_sigs_df,
output_dir = file.path(cosmicv2_plots, "signature_grouped_barplots"),
label = "cosmic_v2"
)
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Make these plots for Alexandrov et al, 2013 signatures.
# Make grouped bar plots
lapply(unique(nature_sigs_df$display_group),
grouped_sig_barplot,
sig_num_df = nature_sigs_df,
output_dir = file.path(nature_plots, "signature_grouped_barplots"),
label = "nature"
)
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Make these plots for COSMIC v3.3 signatures.
# Make grouped bar plots
lapply(unique(cosmicv3_sigs_df$display_group),
grouped_sig_barplot,
sig_num_df = cosmicv3_sigs_df,
output_dir = file.path(cosmicv3_plots, "signature_grouped_barplots"),
label = "cosmic_v3.3"
)
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
Saving 7 x 5 in image
sessionInfo()
R version 3.6.0 (2019-04-26)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Debian GNU/Linux 9 (stretch)
Matrix products: default
BLAS/LAPACK: /usr/lib/libopenblasp-r0.2.19.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.3
[4] purrr_0.3.2 readr_1.3.1 tidyr_0.8.3
[7] tibble_2.1.3 ggplot2_3.2.0 tidyverse_1.2.1
[10] deconstructSigs_1.9.0
loaded via a namespace (and not attached):
[1] Biobase_2.46.0 httr_1.4.0
[3] jsonlite_1.6 R.utils_2.9.0
[5] modelr_0.1.4 assertthat_0.2.1
[7] stats4_3.6.0 BSgenome_1.54.0
[9] GenomeInfoDbData_1.2.2 cellranger_1.1.0
[11] Rsamtools_2.2.3 yaml_2.2.0
[13] pillar_1.4.2 backports_1.1.4
[15] lattice_0.20-38 glue_1.3.1
[17] digest_0.6.20 GenomicRanges_1.38.0
[19] XVector_0.26.0 rvest_0.3.4
[21] colorspace_1.4-1 plyr_1.8.4
[23] htmltools_0.3.6 Matrix_1.2-17
[25] R.oo_1.22.0 XML_3.98-1.20
[27] pkgconfig_2.0.2 broom_0.5.2
[29] haven_2.1.1 zlibbioc_1.32.0
[31] scales_1.0.0 BiocParallel_1.20.1
[33] generics_0.0.2 IRanges_2.20.2
[35] ellipsis_0.2.0.1 withr_2.1.2
[37] SummarizedExperiment_1.16.1 BiocGenerics_0.32.0
[39] lazyeval_0.2.2 cli_1.1.0
[41] magrittr_1.5 crayon_1.3.4
[43] readxl_1.3.1 evaluate_0.14
[45] R.methodsS3_1.7.1 nlme_3.1-140
[47] xml2_1.2.0 tools_3.6.0
[49] data.table_1.12.2 hms_0.4.2
[51] matrixStats_0.54.0 S4Vectors_0.24.4
[53] munsell_0.5.0 DelayedArray_0.12.3
[55] Biostrings_2.54.0 compiler_3.6.0
[57] GenomeInfoDb_1.22.1 rlang_0.4.0
[59] grid_3.6.0 RCurl_1.95-4.12
[61] rstudioapi_0.10 bitops_1.0-6
[63] base64enc_0.1-3 labeling_0.3
[65] rmarkdown_1.13 gtable_0.3.0
[67] reshape2_1.4.3 R6_2.4.0
[69] GenomicAlignments_1.22.1 lubridate_1.7.4
[71] knitr_1.23 rtracklayer_1.46.0
[73] stringi_1.4.3 parallel_3.6.0
[75] Rcpp_1.0.1 tidyselect_0.2.5
[77] xfun_0.8 BSgenome.Hsapiens.UCSC.hg38_1.4.1